# ========================================================================= #
# Project: Lexical Ambiguity in Political Rhetoric (BJPolS)
# - Script: Select examples to illustrate sentence embedding results
# - Author: Patrick Kraft (patrickwilli.kraft@uc3m.es)
# ========================================================================= #


# Load packages and custom functions --------------------------------------

source(here::here("code/00-func.R"))


# Load sentences & embeddings ---------------------------------------------

df <- read_csv(here("out/sentences.csv")) %>%
  bind_cols(read_csv(here("out/embeddings.csv"), col_names = FALSE)) %>%
  bind_cols(
    corpus(.$sentence) %>%
      tokens() %>% 
      tokens_lookup(dictionary = dictionary(file=here("in/mfd2.0.dic"), format="LIWC")) %>%
      dfm() %>% 
      convert("data.frame") %>%
      transmute(Care = (care.virtue + care.vice)>0, 
                Fairness = (fairness.virtue + fairness.vice)>0, 
                Loyalty = (loyalty.virtue + loyalty.vice)>0, 
                Authority = (authority.virtue + authority.vice)>0, 
                Sanctity = (sanctity.virtue + sanctity.vice)>0)
    )


# Example: Sample statement by Hillary Clinton ----------------------------

tmp <- df %>%
  filter(grepl("\\<share\\>", sentence), 
         type == "e) US Presidential Debates",
         year == 2016,
         speaker == "President") %>%
  bind_cols(
    select(., starts_with("X")) %>%
      t() %>%
      lsa::cosine() %>%
      as_tibble()
  ) %>%
  arrange(desc(V4))

bind_rows(head(tmp,3), tail(tmp,2)) %>%
  transmute(Descripton = c("Original", "Similar", rep(NA,1), "Dissimilar", rep(NA,1)),
            Speaker = recode_factor(party,
                                    Democratic = "Clinton",
                                    Republican = "Trump"),
            Year = as.character(year),
            Sentence = sentence,
            Similarity = V4) %>%
  xtable(caption = "Sample statement by Hillary Clinton during the Presidential debates that mentions the term 'share' along with other debate statements that mention the same term and are most similar/dissimilar to Clinton's statement according to the cosine similarity of sentence embeddings.",
         label = "tab:debates",
         align = c("llllp{3in}c")) %>%
  print(file = here("out/tab02-debates.tex"),
        table.placement = "ht",
        hline.after = c(-1,0,1,3,nrow(.)),
        include.rownames = FALSE)
